This report analyzes metastatic melanoma data from MetMap project, as published in “A metastasis map of human cancer cell lines” by Jin, et al 2020., and is described in further detail on the MetMap website.
The goals of this analysis are: 1) Identify which cell lines have high/low metastatic potential to specific organs. 1) Correlate metastatic potential (CI.95) with penetrance. 1) Compare across organs and cell lines. 1) Highlight patterns of multi-organ metastasis vs. organotropism.
The file
Supplementary Table 03 MetMap cell line annotation.xlsx,
which was downloaded from the Metmap download
page contains the “metadata” describing all cell lines in this
study. We will read this file here and select only the melanoma cell
lines for further analysis.
read_excel("./data/metmap_data/Supplementary Table 03 MetMap cell line annotation.xlsx") %>%
dplyr::filter(cancer_type == "melanoma") %>%
as.data.frame() -> metmap_melanoma_metadata
# View(metmap_melanoma_metadata)
DT::datatable(metmap_melanoma_metadata)
We read the file
Supplementary Table 04 MetMap 500 met potential.xlsx, which
was downloaded from the Metmap download
page and combine the different Excel sheets into a single
dataframe:
# Normalize organ placement around a circle (petals at fixed angles)
# organ_positions <- data.frame(
# organ = c("brain", "lung", "liver", "bone", "kidney"),
# angle = seq(0, 2 * pi, length.out = 6)[1:5] # 5 equally spaced directions
# )
sheet_names <- excel_sheets("./data/metmap_data/Supplementary Table 04 MetMap 500 met potential.xlsx")
dplyr::bind_rows(
## brain
read_excel("./data/metmap_data/Supplementary Table 04 MetMap 500 met potential.xlsx",
sheet = 1) %>%
dplyr::rename(CCLE_name = names(.)[1]) %>%
dplyr::mutate(organ = gsub("metp500\\.", "", sheet_names[1])),
## lung
read_excel("./data/metmap_data/Supplementary Table 04 MetMap 500 met potential.xlsx",
sheet = 2) %>%
dplyr::rename(CCLE_name = names(.)[1]) %>%
dplyr::mutate(organ = gsub("metp500\\.", "", sheet_names[2])),
## liver
read_excel("./data/metmap_data/Supplementary Table 04 MetMap 500 met potential.xlsx",
sheet = 3) %>%
dplyr::rename(CCLE_name = names(.)[1]) %>%
dplyr::mutate(organ = gsub("metp500\\.", "", sheet_names[3])),
## bone
read_excel("./data/metmap_data/Supplementary Table 04 MetMap 500 met potential.xlsx",
sheet = 4) %>%
dplyr::rename(CCLE_name = names(.)[1]) %>%
dplyr::mutate(organ = gsub("metp500\\.", "", sheet_names[4])),
## kidney
read_excel("./data/metmap_data/Supplementary Table 04 MetMap 500 met potential.xlsx",
sheet = 5) %>%
dplyr::rename(CCLE_name = names(.)[1]) %>%
dplyr::mutate(organ = gsub("metp500\\.", "", sheet_names[5]))) %>%
dplyr::filter(CCLE_name %in% metmap_melanoma_metadata$CCLE_name) %>%
# dplyr::left_join(organ_positions, by = "organ") %>%
# dplyr::mutate(
# x0 = 0, # center x
# y0 = 0, # center y
# a = CI.95 / 2, # semi-major axis = length of petal
# b = penetrance / 2, # semi-minor axis = width of petal
# angle = angle # angle of petal orientation
# ) %>%
as.data.frame() -> metmap_combined
## save the converted metadata table ( for Takis)
metmap_combined %>%
dplyr::left_join(metmap_melanoma_metadata, by = "CCLE_name") %>%
dplyr::select(contains("name"), contains("id"), everything()) %>%
dplyr::select(-cancer_subtype) %>%
as_tibble() %>%
writexl::write_xlsx(path = "./data/metmap_melanoma_scores.xlsx")
# names(metmap_combined)
# unique(metmap_combined$organ)
DT::datatable(metmap_combined)
metmap_combined %>%
ggplot(aes(x = organ, y = CI.95, fill = organ)) +
geom_boxplot() +
geom_jitter(width = 0.2, alpha = 0.5) +
theme_minimal() +
labs(
title = "Distribution of Metastatic Potential (CI.95) Across Organs",
x = "Organ",
y = "CI.95") +
theme(legend.position = "none")
metmap_combined %>%
ggplot(aes(x = organ, y = penetrance, fill = organ)) +
geom_boxplot() +
geom_jitter(width = 0.2, alpha = 0.5) +
theme_minimal() +
labs(
title = "Distribution of Penetrance Across Organs",
x = "Organ",
y = "Penetrance") +
theme(legend.position = "none")
metmap_combined %>%
dplyr::arrange(desc(CI.95)) %>%
dplyr::mutate(rank = row_number()) %>%
ggplot(aes(x = rank, y = CI.95, fill = organ)) +
geom_col() +
theme_classic() +
labs(title = "Waterfall Plot of Metastatic Potential (CI.95)",
x = "Ranked Cell Line Samples",
y = "CI.95 (Metastatic Potential)",
fill = "Organ") +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank()) -> p1
# plotly::ggplotly(p1)
p1
metmap_combined %>%
dplyr::arrange(penetrance) %>%
dplyr::mutate(rank = row_number()) %>%
ggplot(aes(x = rank, y = penetrance, fill = organ)) +
geom_col() +
theme_classic() +
labs(title = "Waterfall Plot of Penetrance",
x = "Ranked Cell Line Samples",
y = "Penetrance",
fill = "Organ") +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank()) -> p2
# plotly::ggplotly(p2)
p2
metmap_combined %>%
dplyr::select(CCLE_name, CI.95, organ) %>%
tidyr::pivot_wider(
names_from = organ,
values_from = CI.95) %>%
tibble::column_to_rownames("CCLE_name") %>%
as.data.frame() -> df_meta
metmap_scaled1 <- t(scale(t(df_meta))) # scale rows, not columns
metmap_scaled1[is.nan(metmap_scaled1)] <- 0
pheatmap(metmap_scaled1,
fontsize_row = 6,
cluster_rows = TRUE,
cluster_cols = TRUE,
main = "Z-scored CI.95 Metastatic Potential")
metmap_combined %>%
dplyr::select(CCLE_name, penetrance, organ) %>%
tidyr::pivot_wider(
names_from = organ,
values_from = penetrance) %>%
tibble::column_to_rownames("CCLE_name") %>%
as.data.frame() -> df_pen
metmap_scaled2 <- t(scale(t(df_pen))) # scale rows, not columns
metmap_scaled2[is.nan(metmap_scaled2)] <- 0
pheatmap(metmap_scaled2,
fontsize_row = 6,
cluster_rows = TRUE,
cluster_cols = TRUE,
main = "Z-scored Penetrance")
metmap_combined %>%
ggplot(aes(x = CI.95, y = penetrance, color = organ)) +
geom_point(alpha = 0.7) +
geom_vline(xintercept = -2, color = "red", linetype = "dashed") +
geom_smooth(method = "lm", se = FALSE, linetype = "dashed") +
# facet_wrap(~ organ) +
theme_minimal() +
stat_cor(method = "spearman") +
labs(
title = "Correlation between Metastatic Potential (CI.95) and Penetrance",
x = "Metastatic Potential (CI.95)",
y = "Penetrance")
metmap_combined %>%
ggplot(aes(x = CI.95, y = penetrance)) +
geom_point(alpha = 0.7) +
geom_vline(xintercept = -2, color = "red", linetype = "dashed") +
geom_smooth(method = "lm", se = FALSE, linetype = "dashed") +
# facet_wrap(~ organ) +
theme_minimal() +
stat_cor(method = "spearman") +
labs(
title = "Correlation between Metastatic Potential (CI.95) and Penetrance",
x = "Metastatic Potential (CI.95)",
y = "Penetrance")
Below we create faceted scatter plots with regression line for each
organ. The vertical red dashed line at
Metastatic Potential = -2 represents where cell lines are
considered “metastatic” or not.
metmap_combined %>%
ggplot(aes(x = CI.95, y = penetrance, color = organ)) +
geom_point(alpha = 0.7) +
geom_vline(xintercept = -2, color = "red", linetype = "dashed") +
geom_smooth(method = "lm", se = FALSE, linetype = "dashed") +
facet_wrap(~ organ) +
theme_minimal() +
stat_cor(method = "spearman", label.x = -4, label.y = 1.05, size = 3.5) +
labs(
title = "Correlation between Metastatic Potential (CI.95) and Penetrance",
x = "Metastatic Potential (CI.95)",
y = "Penetrance")
# Ensure organ levels are ordered
organ_levels <- c("brain", "lung", "liver", "bone", "kidney")
# Normalize angles for organs around the circle using penetrance as angular width
metmap_combined %>%
dplyr::mutate(organ = factor(organ, levels = organ_levels)) %>%
dplyr::group_by(CCLE_name) %>%
# Calculate cumulative angles per cell line
dplyr::mutate(
total_penetrance = sum(penetrance),
fraction = penetrance / total_penetrance,
angle_width = 2 * pi * fraction,
angle_start = cumsum(lag(angle_width, default = 0)),
angle_end = angle_start + angle_width,
r0 = 0,
r = CI.95) %>% ungroup() -> metmap_arc
# Plot using geom_arc_bar
ggplot(metmap_arc) +
geom_arc_bar(
aes(
x0 = 0, y0 = 0,
r0 = r0, r = r,
start = angle_start, end = angle_end,
fill = organ),
color = "black", alpha = 0.8) +
coord_fixed() +
facet_wrap(~CCLE_name, ncol = 5) +
theme_minimal() +
theme(
strip.text = element_text(size = 8),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
panel.grid = element_blank()) +
labs(
title = "Rose plots of MetMap melanoma cell lines by metastatized organ",
fill = "Organ")
Test/Approach: Hierarchical clustering or UMAP
Purpose: Reveal whether certain cell lines cluster by metastatic behavior across organs.
umap_result <- umap(metmap_scaled1)
umap_df <- as.data.frame(umap_result$layout)
umap_df$CCLE_name <- rownames(df_meta)
ggplot(umap_df, aes(x = V1, y = V2, label = CCLE_name)) +
geom_point(alpha = 0.7) +
geom_text_repel(size = 3, max.overlaps = 50) +
theme_minimal() +
labs(title = "UMAP of Cell Lines by Metastatic Potential",
x = "UMAP1", y = "UMAP2")
umap_result <- umap(metmap_scaled2)
umap_df <- as.data.frame(umap_result$layout)
umap_df$CCLE_name <- rownames(df_meta)
ggplot(umap_df, aes(x = V1, y = V2, label = CCLE_name)) +
geom_point(alpha = 0.7) +
geom_text_repel(size = 3, max.overlaps = 50) +
theme_minimal() +
labs(title = "UMAP of Cell Lines by Penetrance",
x = "UMAP1", y = "UMAP2")
rownames(metmap_scaled1) <- paste0("CI.95_", rownames(metmap_scaled1))
rownames(metmap_scaled2) <- paste0("pen_", rownames(metmap_scaled2))
joint_matrix <- cbind(metmap_scaled1, metmap_scaled2)
joint_scaled <- scale(joint_matrix)
umap_result <- umap(joint_scaled)
umap_df <- as.data.frame(umap_result$layout)
umap_df$CCLE_name <- rownames(df_meta)
ggplot(umap_df, aes(x = V1, y = V2, label = CCLE_name)) +
geom_point(alpha = 0.7) +
geom_text_repel(size = 3, max.overlaps = 50) +
theme_minimal() +
labs(title = "Joint UMAP of Cell Lines by Penetrance and Metastatic Potential",
x = "UMAP1", y = "UMAP2")
One panel per organ.
Y-axis: Cell lines (ordered by mean CI.95)
Dot size: Penetrance
Dot color: CI.95 gradient
Purpose: Identify organ-specific high-metastasizing cell lines.
library(forcats)
# Reorder CCLE_name within each organ by mean CI.95
metmap_plotdata <- metmap_combined %>%
group_by(organ, CCLE_name) %>%
summarize(
CI.95 = mean(CI.95, na.rm = TRUE),
penetrance = mean(penetrance, na.rm = TRUE)
) %>%
ungroup() %>%
group_by(organ) %>%
mutate(CCLE_name = fct_reorder(CCLE_name, CI.95)) # order within organ
# Plot
ggplot(metmap_plotdata, aes(x = 1, y = CCLE_name)) +
geom_point(aes(size = penetrance, color = CI.95)) +
facet_wrap(~ organ, scales = "free_y") +
scale_color_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0) +
scale_size(range = c(1, 6)) +
theme_minimal() +
theme(
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.y = element_text(size = 5),
strip.text = element_text(face = "bold"),
panel.spacing = unit(1, "lines")
) +
labs(
y = "Cell Line (ordered by CI.95)",
color = "Metastatic Potential (CI.95)",
size = "Penetrance",
title = "Organ-specific Metastasis Profiles of Melanoma Cell Lines"
) -> p3
plotly::ggplotly(p3)
We will run various statistical tests to determine the nature of the relationships between metastatic potential, penetrance and organ in melanoma cell lines. The following is a little background on the statistical tests that we will perform:
Kruskal-Wallis Test (Global Non-Parametric Test)
Purpose: To test whether there is a statistically significant difference in the distribution of a continuous variable (like CI.95 or penetrance) across more than two independent groups (e.g., organs: brain, liver, lung, etc.).
Why use Kruskal-Wallis instead of ANOVA?
The variable (e.g., CI.95) is log-transformed and likely non-normally distributed.
Kruskal-Wallis is a non-parametric test and does not assume normality or equal variance.
What it tells you: At least one group differs from the others — but not which one(s).
Dunn’s Test (Post-hoc Pairwise Comparisons)
Purpose: To identify which specific pairs of groups differ after a Kruskal-Wallis test has found a significant global difference.
Why not use multiple Wilcoxon tests directly?
Running many pairwise tests inflates the Type I error rate (false positives).
Bonferroni Correction (Control for Multiple Testing)
Why needed: We’re doing multiple pairwise comparisons (e.g., brain vs. liver, brain vs. lung, etc.).
What it does: Adjusts the p-value threshold to keep the family-wise error rate controlled.
For example, if you do 10 tests, Bonferroni adjusts the significance threshold from 0.05 to 0.005.
Effect: More conservative (reduces false positives, may increase false negatives).
# Global correlation
cor_test_global <- cor.test(metmap_combined$CI.95, metmap_combined$penetrance, method = "spearman")
print(cor_test_global)
#>
#> Spearman's rank correlation rho
#>
#> data: metmap_combined$CI.95 and metmap_combined$penetrance
#> S = 184049, p-value < 2.2e-16
#> alternative hypothesis: true rho is not equal to 0
#> sample estimates:
#> rho
#> 0.86196
cor_results_per_organ <- metmap_combined %>%
dplyr::group_by(organ) %>%
dplyr::summarize(
spearman_rho = cor(CI.95, penetrance, method = "spearman"),
p_value = cor.test(CI.95, penetrance, method = "spearman")$p.value)
print(cor_results_per_organ)
#> # A tibble: 5 × 3
#> organ spearman_rho p_value
#> <chr> <dbl> <dbl>
#> 1 bone 0.920 4.65e-17
#> 2 brain 0.867 4.54e-13
#> 3 kidney 0.899 3.66e-15
#> 4 liver 0.915 1.36e-16
#> 5 lung 0.825 6.06e-11
Test: Kruskal-Wallis test (non-parametric)
Why: CI.95 is continuous, log-transformed, and likely non-normal. We're comparing distributions across multiple organs.
Post-hoc: Dunn's test with Bonferroni correction.
# Kruskal-Wallis test for CI.95 across organs
kruskal_result <- kruskal.test(CI.95 ~ organ, data = metmap_combined)
print(kruskal_result)
#>
#> Kruskal-Wallis rank sum test
#>
#> data: CI.95 by organ
#> Kruskal-Wallis chi-squared = 2.4835, df = 4, p-value = 0.6476
# Dunn's test with Bonferroni correction
dunn_result <- dunnTest(CI.95 ~ organ, data = metmap_combined, method = "bonferroni")
print(dunn_result)
#> Comparison Z P.unadj P.adj
#> 1 bone - brain -0.43271014 0.6652254 1
#> 2 bone - kidney -1.24017817 0.2149095 1
#> 3 brain - kidney -0.80746803 0.4193969 1
#> 4 bone - liver -0.71957378 0.4717875 1
#> 5 brain - liver -0.28686364 0.7742167 1
#> 6 kidney - liver 0.52060439 0.6026424 1
#> 7 bone - lung -1.33097003 0.1831989 1
#> 8 brain - lung -0.89825989 0.3690470 1
#> 9 kidney - lung -0.09079186 0.9276580 1
#> 10 liver - lung -0.61139625 0.5409373 1
Test: Kruskal-Wallis or ANOVA (depending on distribution)
Alt: Chi-squared or Fisher's exact test if binarized penetrance (e.g. high ≥ 0.5, low < 0.5).
# Kruskal-Wallis test across organs
kruskal_penetrance <- kruskal.test(penetrance ~ organ, data = metmap_combined)
print(kruskal_penetrance)
#>
#> Kruskal-Wallis rank sum test
#>
#> data: penetrance by organ
#> Kruskal-Wallis chi-squared = 13.725, df = 4, p-value = 0.008228
# Dunn's test with Bonferroni correction
dunn_result <- dunnTest(penetrance ~ organ, data = metmap_combined, method = "bonferroni")
print(dunn_result)
#> Comparison Z P.unadj P.adj
#> 1 bone - brain 0.2600872 0.794796486 1.00000000
#> 2 bone - kidney -0.5338120 0.593471611 1.00000000
#> 3 brain - kidney -0.7938992 0.427254097 1.00000000
#> 4 bone - liver -1.6423486 0.100517781 1.00000000
#> 5 brain - liver -1.9024358 0.057114201 0.57114201
#> 6 kidney - liver -1.1085366 0.267630143 1.00000000
#> 7 bone - lung -2.9106391 0.003606903 0.03606903
#> 8 brain - lung -3.1707264 0.001520583 0.01520583
#> 9 kidney - lung -2.3768272 0.017462269 0.17462269
#> 10 liver - lung -1.2682906 0.204694209 1.00000000
Session Info
sessionInfo()
#> R version 4.5.0 (2025-04-11)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.2 LTS
#>
#> Matrix products: default
#> BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.12.0
#> LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.12.0 LAPACK version 3.12.0
#>
#> locale:
#> [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
#> [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
#> [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
#> [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
#> [9] LC_ADDRESS=C LC_TELEPHONE=C
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
#>
#> time zone: Europe/Brussels
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] grid stats graphics grDevices utils datasets methods
#> [8] base
#>
#> other attached packages:
#> [1] forcats_1.0.0 DT_0.33 umap_0.2.10.0 ggpubr_0.6.0
#> [5] plotly_4.10.4 pheatmap_1.0.13 FSA_0.10.0 scales_1.4.0
#> [9] ggforce_0.4.2 ggvenn_0.1.10 readr_2.1.5 writexl_1.5.4
#> [13] readxl_1.4.5 ggrepel_0.9.6.9999 ggplot2_3.5.2 tibble_3.2.1
#> [17] tidyr_1.3.1 dplyr_1.1.4
#>
#> loaded via a namespace (and not attached):
#> [1] gtable_0.3.6 xfun_0.52 bslib_0.9.0 htmlwidgets_1.6.4
#> [5] rstatix_0.7.2 lattice_0.22-7 tzdb_0.5.0 crosstalk_1.2.1
#> [9] vctrs_0.6.5 tools_4.5.0 generics_0.1.4 pkgconfig_2.0.3
#> [13] Matrix_1.7-3 data.table_1.17.4 RColorBrewer_1.1-3 lifecycle_1.0.4
#> [17] compiler_4.5.0 farver_2.1.2 carData_3.0-5 htmltools_0.5.8.1
#> [21] sass_0.4.10 yaml_2.3.10 lazyeval_0.2.2 Formula_1.2-5
#> [25] pillar_1.10.2 car_3.1-3 jquerylib_0.1.4 MASS_7.3-65
#> [29] openssl_2.3.3 cachem_1.1.0 dunn.test_1.3.6 abind_1.4-8
#> [33] nlme_3.1-168 RSpectra_0.16-2 tidyselect_1.2.1 digest_0.6.37
#> [37] purrr_1.0.4 splines_4.5.0 labeling_0.4.3 polyclip_1.10-7
#> [41] fastmap_1.2.0 cli_3.6.5 magrittr_2.0.3 utf8_1.2.5
#> [45] dichromat_2.0-0.1 broom_1.0.8 withr_3.0.2 backports_1.5.0
#> [49] rmarkdown_2.29 httr_1.4.7 reticulate_1.42.0 ggsignif_0.6.4
#> [53] cellranger_1.1.0 png_0.1-8 askpass_1.2.1 hms_1.1.3
#> [57] evaluate_1.0.3 knitr_1.50 viridisLite_0.4.2 mgcv_1.9-3
#> [61] rlang_1.1.6 Rcpp_1.0.14 glue_1.8.0 tweenr_2.0.3
#> [65] rstudioapi_0.17.1 jsonlite_2.0.0 R6_2.6.1